In [1]:
%run "0. config.ipynb"
In [2]:
players_stats = pd.DataFrame.from_csv("data/players_stats.csv")
In [4]:
print len(players_stats["sessionId"].unique())
In [57]:
players_stats["count"] = 1
session_id = players_stats.groupby(["sessionId"]).agg({ "count": np.sum, "complete": np.sum })
session_id.head()
Out[57]:
In [58]:
session_id.hist("complete")
Out[58]:
In [60]:
session_id.plot(kind="scatter", x="count", y="complete")
Out[60]:
Only one computer used to play 13 games have completed 5 games. Most of games have been played with a 1 game / 1 computer.
In [56]:
df = pd.DataFrame()
df["class"] = session_id["count"]
df = df.reset_index()
df = df.drop("sessionId",1)
df["count"] = 1
df = df.groupby(["class"]).agg({ "count": np.sum })
print np.sum([ i * x["count"] for i,x in df.iterrows() ])
A = df.reset_index().as_matrix()
#print A
print A[:,0].dot(A[:,1].T)
df.head(20)
Out[56]: